{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "# 基尼系数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "先看看基尼系数与之前信息熵的函数有什么不同（二分类）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEKCAYAAAD9xUlFAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4xLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvAOZPmwAAIABJREFUeJzt3Xd4lfXdx/H3N5sMAmQAGYQVCHuFqagMEaiixQEoVSuKW+torbXaVp/WVp/a1tYFDpzgqANXsQ6UDWFvCBmEsBIgITs5ye/54xzypBTIgeTOfcb3dV25rjPuc87nZpxPfvf43WKMQSmllAIIsDuAUkopz6GloJRSqp6WglJKqXpaCkoppeppKSillKqnpaCUUqqeloJSSql6WgpKKaXqaSkopZSqF2R3gLMVGxtrOnfubHcMpZTyKmvXri00xsQ1tpzXlULnzp3JyMiwO4ZSSnkVEcl1ZzndfKSUUqqeloJSSql6WgpKKaXqaSkopZSqp6WglFKqnqWlICITRWSniGSKyC9P8fyNIlIgIhtcPzdbmUcppdSZWXZIqogEAs8BFwP7gDUistAYs+2kRd81xtxlVQ6llFLus/I8hWFApjEmC0BEFgCXAyeXglJewRhDQUkVh0uqKCiporC0ivLqWsqra6msqQUgKEAIDBQiQoJo3SqI6FbBxEeFkdCmFW3DgxERm9dCqTOzshQSgbwG9/cBw0+x3JUicgGwC7jPGJN38gIiMhuYDdCpUycLoir1nwpKqtiyv5it+cVsO3CcrIIyco+UU+H68j8XrYIDSYkJp0f7KHp2iKJ3QmsGJbehTXhIMyZXqmnsPqP5U2C+MaZKRG4FXgfGnryQMWYOMAcgPT3dtGxE5Q+OlFaxZHchK7OOsCr7KNmFZfXPdY4Jp1tcJOd1j6VzTDjxrcOIjQwlLjKUyLAgWgUHEhoUgAjU1hkcdYayKgfFFTUUV9Rw6HgV+4sqyC+qIKuglLW5x1i4cX/9+3eNi2B4l3aMTo1jVLcYLQllKytLIR9IbnA/yfVYPWPMkQZ3XwaesjCPUv8hp7CMzzcf4Jvth1ifV4Qx0DosiGFd2jFjWDIDktrQO6E1UWHBbr9nUKAQFAhhwYHERIaedrmSyho25xezfm8R63KP8dnGA8xfnUeAwKBObZnUtwOT+nUksU2r5lhVpdwmxljzi7eIBOHcJDQOZxmsAa41xmxtsExHY8wB1+0fAw8ZY0ac6X3T09ONzn2kztXRsmo+2ZDPxxv2szGvCIB+idGM6xXP2LR4+iREExjQ8tv9HbV1bNxXxPe7Cvl62yG2HTgOwKBObbhycBKXDUggupX75aTUyURkrTEmvdHlrCoFV4jJwF+BQOBVY8zvReRxIMMYs1BEngSmAA7gKHC7MWbHmd5TS0Gdiw15RbyxIofPNh2g2lFHn4TWXD4wgUv7J5Dggb+N5xSW8eWWg3y8Pp+dh0oICQpgct8O3HR+F/ontbE7nvJCHlEKVtBSUO6qqa1j4Yb9vL4ih037iokICWTq4CRmjkihZ4cou+O5xRjDlvzjvJeRx0fr8ymtcjCscztmje7C+F7tbRnVKO+kpaD8VrWjjg/X7eO5xZnkHa0gNT6S60emcMWgxLPaP+BpSipreHdNHq8tyyG/qIKUmHBuu7AbVw1JIjhQJydQZ6aloPxOtaOOD9bu47nvMskvqmBAUjT3jk9lTM94nzo/wFFbx1fbDvHSD1lszCuic0w4913cg8v6JxCgIwd1GloKym8YY/j3tkP8/ovt5B4pZ2ByG+4dn8pFPeJ8qgxOZozhm+2H+d+vdrLjYAlpHaJ4YEJPxvfyrRJUzUNLQfmFHQeP88Rn21iWeYTu8ZE8MrkXF/X07TI4WV2d4bPNB/jLv3eRXVjG6NRYfjelD13jIu2OpjyIloLyacUVNfzvop28vSqXqLBg7hufynUjUvx627qjto63Vuby5692Uemo5ZbRXblrbHfCQ+w+R1V5Ai0F5bO+2nqQX3+8hcLSKmaOSOG+8T1oG6FnAZ9QUFLFk19u58N1+SREh/G7y/tyce/2dsdSNtNSUD7nSGkVv1m4lc82HSCtQxRPXzWAfknRdsfyWGtyjvLox1vYcbCEqYMS+c2UPnoCnB9ztxR0XKm8wqcb9/PYJ1sorXJw/8U9uO3CboQE+e+mIncM7dyOhXedzz++y+S57zJZvucIT13Vnwt6xNkdTXkw/V+lPFp5tYOfv7+Ru+evp1NMBJ/fM5p7xqVqIbgpJCiA+y/uwUd3jCIyLIjrX13Nrz7aTHm1w+5oykPpSEF5rK37i7l7/nqyC8u4a0x3fjY+lSA/3pHcFP2T2vDZ3efzzL93MXdJFmuyj/LCzMF0j/eOM7tVy9H/YcrjGGOYtyybHz+3nNJKB2/PGs6Dl/TUQmiisOBAfjW5F2/eNJyjZdVc9vdlfLR+n92xlIfR/2XKo5RXO7hr/np+++k2zk+N5ct7RzOqe6zdsXzK+amxfHHvaPolRXPfuxt5+MNN9VeOU0o3HymPkXe0nNlvrmXHweM8NDGN2y7s6lcnobWk9q3DeOfm4Tzz7108v3gPm/YVM/f6dI+cMVa1LB0pKI+wYs8RpvxjKfuOlfPqjUO5/aJuWggWCwoM4BcT03j1xnT2Hilnyj+WsX7vMbtjKZtpKSjbvbEih5mvrKJdRAif3HkeY3rG2x3Jr4xNa8+Hd4wiPCSQaXNW8smG/MZfpHyWloKyTV2d4YnPtvHYJ1u5qEccH995ns7XY5PU9lF8fOd5zskEF2zgma92UlfnXSe2quahpaBsUeWo5e4F63llaTY3jurMnOvTvfpaB76gXUQIb80azjXpSTz7bSb3vbeBaked3bFUC9MdzarFFZfXMPvNDFZlH+XhSWnMvkB3KHuKkKAA/nRlf1JiInh60U6Kymt4YeZgnVTPj+hIQbWo/UUVXP3SctbtPcbfpg/k1gt1h7KnERHuHNOdP07tx5LdBVw7dxXHyqrtjqVaiJaCajG5R8q4+sUVHCiq5PWfDuPygYl2R1JnMH1YJ16YOYRtB45z9Usr2F9UYXck1QK0FFSL2FNQyjUvraCs2sH82SP0hDQvcUmfDrxx0zAOFVdy1QvLySksszuSspiWgrLczoMlTHtpJbV1hgWzR9A3Uae79iYjusYwf/YIKmpqmT5npRaDj9NSUJbakl/M9DkrCAyABbNHktahtd2R1DnomxjNO7eMoLq2jmlzVpCtxeCztBSUZbbkF3Pt3JWEhwTx3q0j6R6v5yB4s14dWzP/lhE4ag3TXlrBnoJSuyMpC2gpKEvsPFjCT15ZRVRYMO/eOoKUmAi7I6lm0LNDFPNnj6DOGKbPWUnmYS0GX6OloJpddmEZ1728iuDAAN65ZThJbcPtjqSaUY/2Ucy/ZQTGwMyXV5F3tNzuSKoZaSmoZpV3tJzr5q6kzhjevnm4jhB8VGr7KN66eRgVNbXMfGUVh0sq7Y6kmomWgmo2B4srue7lVZRWOXhz1jBS2+tVvXxZWofWvPbToRSUVHH9K6spLq+xO5JqBloKqlkUl9fwk1dWcaS0itdvGkafBD3s1B8M7tSWudenk1VQxk/nraasSq/97O20FFSTVdbUcssbGeQcKWPu9ekM6tTW7kiqBZ3XPZZnZwxiQ14Rt765liqHXsXNm2kpqCaprTPc/94GVucc5c/XDNQzlf3UxL4deOqqASzNLOShDzZhjE677a106kN1zoxxXg/hi80H+fWPejFlQILdkZSNrhqSxKHjlTy9aCfJ7cJ5YEJPuyOpc2DpSEFEJorIThHJFJFfnmG5K0XEiEi6lXlU85rzQxbzlucw6/wu3Dy6q91xlAe446JuTB+azN+/zeS9NXl2x1HnwLJSEJFA4DlgEtAbmCEivU+xXBRwL7DKqiyq+X26cT9PfrmDH/XvyCOTe9kdR3kIEeGJK/pyQY84Hv5oMz/sKrA7kjpLVo4UhgGZxpgsY0w1sAC4/BTLPQH8CdADnb3ExrwiHnx/I+kpbXnmmgEEBOj1ENT/Cw4M4LlrB5EaH8kdb69j2/7jdkdSZ8HKUkgEGo4f97keqycig4FkY8znZ3ojEZktIhkiklFQoL952OlgcSW3vJFBbGQoL/5kCKFBgXZHUh4oKiyY1346lMjQIGa9voaCkiq7Iyk32Xb0kYgEAM8ADzS2rDFmjjEm3RiTHhcXZ304dUoV1c5DT8uqHLxyYzqxkaF2R1IerGN0K16+IZ1j5dXc/tZavd6zl7CyFPKB5Ab3k1yPnRAF9AUWi0gOMAJYqDubPVNdneHB9zeyZX8xf5s+SKfAVm7pmxjN01cNICP3GL9ZuEUPVfUCVh6SugZIFZEuOMtgOnDtiSeNMcVA/UHtIrIYeNAYk2FhJnWOnv12N59vPsAvJ6Uxvnd7u+MoL3LZgAR2HDzOc9/toXfH1vxkZGe7I6kzsGykYIxxAHcBi4DtwHvGmK0i8riITLHqc1Xz+3rbIf769W6mDk7k1gv00FN19h64uCfj0uL53afbWLHniN1x1BmItw3n0tPTTUaGDiZaSk5hGZf9YykpMeF8cNsowoJ1x7I6NyWVNfz4+eUcKa1i4V3nk9xOp1RvSSKy1hjT6OZ5neZCnVZFdS23vbWWwADhheuGaCGoJokKC2bu9ek46gx3vL1O50jyUFoK6pSMMfzqo83sPFTC36YP0t/qVLPoEhvBM9cMZHN+MU98ts3uOOoUtBTUKb25MpeP1udz//geXNhDDwNWzefi3u259cKuvLVyLx+vz2/8BapFaSmo/7I29xiPf7qNcWnx3Dmmu91xlA/6+YSeDOvSjoc/3MzuQyV2x1ENaCmo/1BUXs0989fTsU0Yz0wbqFNYKEsEBQbwjxmDiAgN4ra31urFeTyIloKqZ4zhFx9s4nBJJf+YMZjoVsF2R1I+LL51GM/OGEh2YRm//HCzntjmIbQUVL03VuTy1bZDPDQxjQHJbeyOo/zAqG6xPDChJ59u3M97GTrVtifQUlAAbN1fzO8/387YtHhmnd/F7jjKj9x+YTdGdYvhtwu3kXm41O44fk9LQVFW5eDud9bTNiKY/716ACK6H0G1nIAA4S/TBhIWHMA989fr+Qs201JQPPrJFnKOlPG36YNoFxFidxzlh9q3DuPpqwaw7cBxnvrXTrvj+DUtBT/3yYZ8PlyXz91jUxnRNcbuOMqPje/dnhtGpvDK0mwW7zxsdxy/paXgx/YXVfDox1sY3KkNd4/V8xGU/R6e3Iu0DlE8+P5GDpfoxRjtoKXgp+rqDD//YCOOOsMz1wwkKFD/KSj7hQUH8vcZgyipdPCLDzbpYao20G8CPzVveQ7LMo/w6KW96RwbYXccpeqlto/i4UlpLN5ZwII1ephqS9NS8EO7DpXwx3/tYHyveKYPTW78BUq1sOtHdmZUtxj+57Nt5B0ttzuOX9FS8DPVjjp+tmADUaFBPDm1vx5+qjxSQIDw9NUDCBDhgfc3Ulenm5FaipaCn/nbN7vYduA4T07tR1xUqN1xlDqtxDateOyy3qzOPsqry7LtjuM3tBT8yKZ9Rbz4fRZXD0liQp8OdsdRqlFXDUlifK/2PLVop86m2kK0FPxElaOWn7+/idjIEH59aW+74yjlFhHhyan9iAwN4v73NlJTW2d3JJ+npeAnnvs2k52HSnhyaj+d/VR5lbioUH5/RV825xczd0mW3XF8npaCH9i6v5jnF+9h6qBExqa1tzuOUmdtUr+OTO7Xgb9+vZs9BTppnpW0FHxcTW0dD76/ibYRITx2mW42Ut7rt1P60Co4kIc+2KRHI1lIS8HHPf/dHrYfOM7/XNGXNuE62Z3yXvFRYTx6aW8yco/x1qpcu+P4LC0FH7bzYAn/+G43lw1I4BI92kj5gCsHJ3JBjzj+9OUO9h3Tk9qsoKXgo+rqDA9/uInI0CB+q5uNlI8QEf7w474Y4JGPtujcSBbQUvBR76zey7q9Rfz6R72JidST1JTvSGobzkMT0/h+VwEfrsu3O47P0VLwQYePV/Knf+1gVLcYpg5OtDuOUs3uJyNSSE9py/98vo2jZdV2x/EpWgo+6HefbqPKUcfvf9xP5zZSPikgQPj9j/tRUungj19utzuOT9FS8DHf7jjE55sPcPeY7nTRKbGVD+vZIYqbR3flvYx9rM4+anccn6Gl4EPKqx08+vFWusdHcuuF3eyOo5Tl7hnXncQ2rXjko81UO3QKjOagpeBD/vr1bvKLKnhyaj9CgvSvVvm+8JAgnriiD7sPl/LyUp0CoznoN4eP2HmwhFeWZjN9aDJDO7ezO45SLWZsWnsm9unAs9/s1gvyNANLS0FEJorIThHJFJFfnuL520Rks4hsEJGlIqIH1J8DYwyPfrKFqLAgHpqYZnccpVrcb6b0JlCExz7RcxeayrJSEJFA4DlgEtAbmHGKL/13jDH9jDEDgaeAZ6zK48sWbtzP6uyj/PySnrSN0KkslP/pGN2K+y7uwXc7C1i09aDdcbyalSOFYUCmMSbLGFMNLAAub7iAMeZ4g7sRgFb8WSqprOH3n2+nf1I004d2sjuOUra5cVRn0jpE8cRn26msqbU7jtdyqxRE5DIROdsCSQTyGtzf53rs5Pe+U0T24Bwp3HOWn+H3nv1mNwWlVTx+eV8CA/ScBOW/ggID+O2UPuQXVfDi93vsjuO13P2inwbsFpGnRKRZN1obY54zxnQDHgJ+faplRGS2iGSISEZBQUFzfrxX232ohNeW5TAtPZmByW3sjqOU7UZ0jeHS/h15YfEe3el8jtwqBWPMTGAQsAeYJyIrXF/UUWd4WT6Q3OB+kuux01kAXHGaz59jjEk3xqTHxcW5E9nnGWN47JOtRIQG8QvduaxUvV9N7kWACL//XM90PhdubxJybf//AOeXd0fgx8A6Ebn7NC9ZA6SKSBcRCQGmAwsbLiAiqQ3u/gjYfRbZ/drnmw+wIusID17Sk3a6c1mpegltWnHnmG78a+tBlu4utDuO13F3n8IUEfkIWAwEA8OMMZOAAcADp3qNMcYB3AUsArYD7xljtorI4yIyxbXYXSKyVUQ2APcDNzRpbfxEZU0tT36xg94dW3PtMN25rNTJbh7dlU7twvntp1upqdUznc9GkJvLXQn8xRjzQ8MHjTHlIjLrdC8yxnwBfHHSY481uH3vWWRVLnN/yCK/qII/XzNAdy4rdQphwYE8dmlvbn4jg9eX53Dz6K52R/Ia7u5TuAHY5RoxXCYiHRo8941l6dR/OVhcyfOL9zCpbwdGdI2xO45SHmtcr3gu6hnH377eTWFpld1xvIa7m49mAauBqcBVwEoRucnKYOrUnlq0g9o6w68m97I7ilIeTUT49Y96U1FTy1+/3mV3HK/h7o7mXwCDjDE3ukYNQ3AeQqpa0Ma8Ij5cl8+s0V1IbhdudxylPF73+EhmjkjhnVV72XWoxO44XsHdUjgCNPwTLXE9plqIMYbHP9tGbGQod47pbnccpbzGveNSiQwN0kNU3eRuKWQCq0TktyLyG2Alzn0M94vI/dbFUyd8uukAa3OP8YtLehIZ6u7xAUqpthEh3DMule93FbB452G743g8d0thD/Ax/z830SdANhDl+lEWqqyp5Y9fbKdPQmuuHJJkdxylvM5PRqaQEhPOH77YjkMPUT0jt37lNMb8DkBEIl33S60Mpf7Ta8ty2F9cyZ+vGaiHoCp1DkKDAnl4Uhq3vbWOdzPyuG54it2RPJa7Rx/1FZH1wFZgq4isFZE+1kZTAEfLqnn+u0zGpcUzspsegqrUubqkTweGdWnHM1/toqSyxu44HsvdzUdzgPuNMSnGmBScZzHPtS6WOuEf32ZSVu3goUk6v5FSTSEiPPqj3hwpq+b5xTqL6um4WwoRxpjvTtwxxizGef0DZaG9R8p5c2UO16Qn06O97rpRqqn6JUVzxcAEXluWzcHiSrvjeCR3SyFLRB4Vkc6un18DepVsiz391U4CA4T7Lu5hdxSlfMYDE3pSW2f42zd6QtupuFsKNwFxwIfAP4FY12PKIhvzivh0435uGd2V9q3D7I6jlM9IbhfOzBEpvLsmj8zDeszMyRotBde1lh8xxtxjjBlsjBlijPmZMeZYC+TzS8YY/vDFdmIiQph9gU7kpVRzu2tMd8JDgnh60Q67o3icRkvBGFMLnN8CWZTLdzsPsyr7KPeOTyUqLNjuOEr5nJjIUG67sCuLth5ibe5Ru+N4FHc3H60XkYUi8hMRmXrix9JkfqquzvDUv3bSOSacGXqtBKUsc9P5XYiLCuWPX+7AGNP4C/yEu6UQhnOuo7HAZa6fS60K5c8+3bSfHQdLuO/iHgQHun1hPKXUWQoPCeJn41NZk3OMb7br9BcnuDuJzsvGmGUNHxCR8yzI49dqauv4y793kdYhisv6J9gdRymfd016Mq8syeZP/9rBmLR4nTEA90cKf3fzMdUE/1y7j5wj5TwwoScB+o9TKcsFBwbw4CU92X24lE825NsdxyOccaQgIiOBUUDcSbOhtgYCrQzmbyprann2m90MTG7D+F7xdsdRym9M6tuBPgmt+evXu7lsQILfb7ZtbO1DgEic5RHV4Oc4ziuwqWbyzqq97C+u5OeX9ERERwlKtRQR4cEJPdl7tJz3M/bZHcd2ZxwpGGO+B74XkXnGmNwWyuR3yqocPL84k1HdYjive6zdcZTyOxf1jGNwpzb8/dvdTB2cSFiw/24IcXecFCoic0TkKxH59sSPpcn8yLzlORSWVvPgJT3tjqKUXxIRHrykJweKK3ln1V6749jK3aOP3gdeBF4Gaq2L43+KK2p46fs9jEuLZ3CntnbHUcpvjeoWy6huMTy/OJPpw5IJD/HPKxy6O1JwGGNeMMasNsasPfFjaTI/MfeHLI5XOnhggo4SlLLbAxN6UlhazbzlOXZHsY27pfCpiNwhIh1FpN2JH0uT+YHC0ipeXZbNpf070juhtd1xlPJ7Q1LaMjYtnpe+z6K4wj8vxONuKdwA/BxYDqx1/WRYFcpfPP/dHqocddyvU2Mr5THuv7gHxRU1vLLEP68O4FYpGGO6nOJHp+9sgsPHK3l7VS5TByXSNS7S7jhKKZe+idFM6tuB15blUFzuf6OFM5aCiPyiwe2rT3ruD1aF8gcv/ZCFo85w19judkdRSp3knnGplFQ5eHVZtt1RWlxjI4XpDW4/fNJzE5s5i98oKKni7VW5XDEwkZQYvaqpUp6mV8fWXNKnPa8uy+Z4pX+NFhorBTnN7VPdV26auySLakcdd47pZncUpdRp3D02lZJKB/OW5dgdpUU1VgrmNLdPdV+54UhpFW+uyGXKgATdl6CUB+ubGM34XvG8sjSbEj8aLTRWCgNE5LiIlAD9XbdP3O/XAvl8ztwl2VQ6arlrbKrdUZRSjbhnXCrFFTW8scJ/Zvk5YykYYwKNMa2NMVHGmCDX7RP3G71OpIhMFJGdIpIpIr88xfP3i8g2EdkkIt+ISEpTVsbTHS2r5o0VOVzaP4Hu8TpKUMrT9U9qw5ieccxdkkVplcPuOC3CsjliRSQQeA6YBPQGZohI75MWWw+kG2P6Ax8AT1mVxxO8sjSLippa7tYjjpTyGveMS6WovIY3/WS0YOXE4cOATGNMljGmGlgAXN5wAWPMd8aYctfdlUCShXlsVVRezevLc5nctyM92kfZHUcp5aZBndpyQQ/naKG82vdHC1aWQiKQ1+D+PtdjpzML+NLCPLZ6dWk2pVUO7h6nowSlvM2941I5WlbNWyt9f7TgEZcYEpGZQDrw9Gmeny0iGSKSUVBQ0LLhmkFxRQ2vLcthYp8OpHXQOY6U8jZDUtpyfvdY5vyQRUW1b08UbWUp5APJDe4nuR77DyIyHngEmGKMqTrVGxlj5hhj0o0x6XFxcZaEtdJry7Ip0VGCUl7t3vGpFJZW8/Yq3x4tWFkKa4BUEekiIiE4z45e2HABERkEvISzEA5bmMU2pVUOXl2azfhe7emTEG13HKXUORrauR0ju8Yw54csKmt8d7RgWSkYYxzAXcAiYDvwnjFmq4g8LiJTXIs9jfMa0O+LyAYRWXiat/Na81ft5XilQ+c4UsoH3DmmO4dLqvho/X9t9PAZll5ayBjzBfDFSY891uD2eCs/325VjlpeXprFqG4xDExuY3ccpVQTndc9hv5J0bz0/R6uSU8mMMD3ZvvxiB3NvuqjdfkcOl7F7RfpHEdK+QIR4fYLu5FzpJwvtxywO44ltBQsUltneOmHLPomtub87rF2x1FKNZMJfTrQNTaCFxbvwRjfmwJOS8Eii7YeJLuwjDsu6o6I7w0xlfJXgQHCbRd2Y+v+4yzZXWh3nGanpWABYwzPL86kS2wEl/TpYHccpVQzu3xQAh1ah/H84ky7ozQ7LQULLM0sZEv+cW69oKtP7ohSyt+FBgVy8+gurMw6yrq9x+yO06y0FCzwwuI9tG8dyo8Hn2lWD6WUN5sxrBPRrYJ5cfEeu6M0Ky2FZrYhr4jle45w8/ldCQ0KtDuOUsoiEaFB3DCqM19tO8TuQyV2x2k2WgrN7IXFmUS3CmbG8E52R1FKWezGUZ1pFRzIi99n2R2l2WgpNKPMwyUs2nqIG0amEBlq6XmBSikP0C4ihOnDkvlkQz75RRV2x2kWWgrN6MXvswgLDuCGUZ3tjqKUaiE3j+4KwNwffGO0oKXQTA4WV/Lx+nympScTExlqdxylVAtJbNOKywcm8u6aPIrKq+2O02RaCs1k3vIc6oyp/61BKeU/brmgCxU1tby9aq/dUZpMS6EZlFU5eGdVLhP7diC5XbjdcZRSLSytQ2tGp8Yyb3kOVQ7vnlZbS6EZvJ+Rx/FKh44SlPJjt4zuSkFJFZ9u9O6J8rQUmqi2zvDqshwGd2rD4E5t7Y6jlLLJ6NRYeraP4uUlWV49UZ6WQhP9e9tB9h4t5xYdJSjl10SEWaO7sONgCUszvXeiPC2FJpq7JJtO7cKZoBPfKeX3Lh+YQFxUKC8vybY7yjnTUmiCdXuPsTb3GDed11knvlNKERoUyA0jU/h+VwG7vHTqCy2FJnhlSTatw4K4Oj3Z7ihKKQ9x3fAUwoIDeHmJd57MpqVwjvKOOi+B5m/nAAAO9ElEQVTHd+3wFCJ0SgullEvbiBCuGpLEx+v3c7ik0u44Z01L4Ry9uiybABFu1CktlFInuem8LtTU1fHWily7o5w1LYVzUFxRw3tr8rhsQAIdosPsjqOU8jBd4yIZl9aeN1fmUlHtXSezaSmcgwWr91JWXcvNo7vYHUUp5aFuGd2FY+U1/HPdPrujnBUthbNUU1vHvOU5jOoWQ5+EaLvjKKU81LAu7eifFM2rS7Opq/Oek9m0FM7Soq0HOVBcyazzdZSglDo9EWHW+V3IKizj+90Fdsdxm5bCWZq3LIeUmHDG9Iy3O4pSysNN6tuR+KhQ5i3LsTuK27QUzsKW/GIyco9x/cjOBOjJakqpRoQEBXDdcOfJbFkFpXbHcYuWwlmYtzyH8JBArk5PsjuKUspLXDu8E8GBwhtecniqloKbjpRWsXDjfq4cnETrsGC74yilvERcVCiX9k/g/Yw8Sipr7I7TKC0FNy1Yk0e1o44bRqXYHUUp5WVuHNWZsupa/rnW8w9P1VJwQ01tHW+uyGV0aizd46PsjqOU8jIDktswMLkNr6/I9fjDU7UU3LBo60EOHq/khpGd7Y6ilPJSPz2vM9mFZfzg4YenWloKIjJRRHaKSKaI/PIUz18gIutExCEiV1mZpSleX55Dp3bhjEnTw1CVUudmUt+OxEWFMm95jt1RzsiyUhCRQOA5YBLQG5ghIr1PWmwvcCPwjlU5mmpLfjFrco5x/cgUvWaCUuqcOQ9P7cTinQVkF5bZHee0rBwpDAMyjTFZxphqYAFwecMFjDE5xphNQJ2FOZrk9eU5tAoO1GsmKKWa7MThqa978GjBylJIBPIa3N/nesxrHCmt4pON+5k6OJHoVnoYqlKqaeKjwvhRv458sHYfpVUOu+OcklfsaBaR2SKSISIZBQUtt5PmxGGoes0EpVRzufG8LpRWOTz28FQrSyEfaLjNJcn12FkzxswxxqQbY9Lj4uKaJVxjamrreGtlLud1jyG1vR6GqpRqHgOT2zAguQ2vr8jxyMNTrSyFNUCqiHQRkRBgOrDQws9rVl9vO8SBYj0MVSnV/G4clUJWQRnL9hTaHeW/WFYKxhgHcBewCNgOvGeM2Soij4vIFAARGSoi+4CrgZdEZKtVec7WW6tySYgOY1yv9nZHUUr5mMn9OtIuIoS3VnrefEiWXnHeGPMF8MVJjz3W4PYanJuVPEpWQSnLMo/wwMU99DBUpVSzCw1yTqz58pJsDhZXetRlfb1iR3NLe3vVXoIChGnD9DBUpZQ1rhuWQp0xzF+91+4o/0FL4SSVNbV8sHYfl/TpQHyU57S3Usq3dIoJ54LUOBas2Yuj1nNO1dJSOMlnmw5QXFHDdSM62R1FKeXjZo5I4dDxKr7eftjuKPW0FE7y1spcusVFMLJrjN1RlFI+bmxaPAnRYby9ynN2OGspNLAlv5gNeUVcNzwFEd3BrJSyVmCAMGNYJ5bsLvSY+ZC0FBp4e1UuYcEBXDnE4w6IUkr5qGlDkwkKEN7xkNGCloLL8coaPl6/nykDEnSeI6VUi4lvHcaEPu15f+0+Kmtq7Y6jpXDCR+vyqaipZeYIvdymUqplzRyeQlF5DZ9vOmB3FC0FAGMMb6/KpX9SNP2T2tgdRynlZ0Z2i6FrXIRH7HDWUgDW5Bxj16FSZg7XUYJSquWJCNcNT2Hd3iK27T9uaxYtBZyHoUaFBXHpgI52R1FK+amrBicRFhzAWzaPFvy+FApLq/hyywGuHJxEeIilU0EppdRpRYcHc1n/BD5en09JZY1tOfy+FD5Yu4+aWsNMPYNZKWWz60akUF5dyycb9tuWwa9LwRjDu2vyGNa5Hd3j9UI6Sil7DUiKplfH1ry7Jq/xhS3i16WwKvso2YVlTBuqs6EqpewnIkwfmszm/GK25BfbksGvS+HdNXlEhQUxuZ/uYFZKeYYrBiYSGhRg22jBb0uhuLyGLzYf4IqBibQKCbQ7jlJKAc4dzpP7deTjDflUVLf8Gc5+Wwofrd9HlaOO6XohHaWUh5k+NJmSSgefb275M5z9shSMMSxYk0e/xGj6JETbHUcppf7DsC7t6BobwbtrWv6qbH5ZChv3FbPjYImOEpRSHklEmDY0mTU5x8g8XNKin+2XpfDumr20Cg5kyoAEu6MopdQpXTkkiaAAafEdzn5XCmVVDhZu2M+l/TsSFaZTZCulPFNsZCgX927PP9flU+VouR3OflcKn23aT1l1rW46Ukp5vOnDOnG0rJqvt7XcNZz9rhTmr84jNT6SwZ3a2h1FKaXO6PzusSS2acWCFtzh7FelsOPgcTbkFTFtaLJeg1kp5fECA4Rr0pNZsruQvKPlLfKZflUKC1bnERIYwNTBeg1mpZR3uDo9iQCB9zJaZoez35RCZU0tH63PZ0Kf9rSLCLE7jlJKuSWhTSsu7BHHexl5OGrrLP88vymFRVsPUlxRw4xhOkW2Usq7TB/WiUPHq/h+V4Hln+U3pRAREsTFvdszsmuM3VGUUuqsjE2LZ2xaPCFB1n9lizHG8g9pTunp6SYjI8PuGEop5VVEZK0xJr2x5fxmpKCUUqpxWgpKKaXqWVoKIjJRRHaKSKaI/PIUz4eKyLuu51eJSGcr8yillDozy0pBRAKB54BJQG9ghoj0PmmxWcAxY0x34C/An6zKo5RSqnFWjhSGAZnGmCxjTDWwALj8pGUuB1533f4AGCd6qrFSStnGylJIBBqegrfP9dgplzHGOIBiQI8ZVUopm3jFjmYRmS0iGSKSUVBg/ckbSinlr6wshXyg4fzUSa7HTrmMiAQB0cCRk9/IGDPHGJNujEmPi4uzKK5SSqkgC997DZAqIl1wfvlPB649aZmFwA3ACuAq4FvTyNl0a9euLRSR3LPIEQsUnsXyvkLX2//467rrersnxZ2FLCsFY4xDRO4CFgGBwKvGmK0i8jiQYYxZCLwCvCkimcBRnMXR2Pue1VBBRDLcOYvP1+h6+x9/XXdd7+Zl5UgBY8wXwBcnPfZYg9uVwNVWZlBKKeU+r9jRrJRSqmX4QynMsTuATXS9/Y+/rruudzPyullSlVJKWccfRgpKKaXc5DOl4K+T77mx3veLyDYR2SQi34iIW4elebrG1rvBcleKiBERnzg6xZ31FpFrXH/nW0XknZbOaAU3/p13EpHvRGS969/6ZDtyNjcReVVEDovIltM8LyLyrOvPZZOIDG7yhxpjvP4H5yGve4CuQAiwEeh90jJ3AC+6bk8H3rU7dwut9xgg3HX7dn9Zb9dyUcAPwEog3e7cLfT3nQqsB9q67sfbnbuF1nsOcLvrdm8gx+7czbTuFwCDgS2neX4y8CUgwAhgVVM/01dGCv46+V6j622M+c4YU+66uxLnmeXezp2/b4AncM68W9mS4SzkznrfAjxnjDkGYIw53MIZreDOehugtet2NLC/BfNZxhjzA85zuE7ncuAN47QSaCMiHZvymb5SCv46+Z47693QLJy/VXi7RtfbNYxONsZ83pLBLObO33cPoIeILBORlSIyscXSWced9f4tMFNE9uE8N+rulolmu7P9DmiUpSevKc8hIjOBdOBCu7NYTUQCgGeAG22OYocgnJuQLsI5KvxBRPoZY4psTWW9GcA8Y8yfRWQkzpkS+hpj6uwO5m18ZaTQbJPveRl31hsRGQ88AkwxxlS1UDYrNbbeUUBfYLGI5ODc1rrQB3Y2u/P3vQ9YaIypMcZkA7twloQ3c2e9ZwHvARhjVgBhOOcG8nVufQecDV8phfrJ90QkBOeO5IUnLXNi8j1wc/I9L9DoeovIIOAlnIXgC9uXoZH1NsYUG2NijTGdjTGdce5LmWKMybAnbrNx59/5xzhHCYhILM7NSVktGdIC7qz3XmAcgIj0wlkK/jDP/kLgetdRSCOAYmPMgaa8oU9sPjIWTb7n6dxc76eBSOB91371vcaYKbaFbgZurrfPcXO9FwETRGQbUAv83Bjj1SNiN9f7AWCuiNyHc6fzjT7wSx8iMh9nyce69pf8BggGMMa8iHP/yWQgEygHftrkz/SBPzellFLNxFc2HymllGoGWgpKKaXqaSkopZSqp6WglFKqnpaCUkqpeloKyqeJSK2IbBCRLSLyvoiE25TjZ3Z9tlJnQ0tB+boKY8xAY0xfoBq4zd0XikhgM+b4GaCloDyeloLyJ0uA7uCcC0pEVrtGES+dKAARKRWRP4vIRmCkiAwVkeUistG1fJSIBIrI0yKyxjWH/a2u114kIotF5AMR2SEib7vONL0HSAC+E5HvXMu+ICIZrmse/O5EQBGZ7HrtWtc8+Z+5Ho9wza2/2nXNgFPNCqtUk2kpKL/gmu9qErDZNQ3CNOA8Y8xAnGf+XudaNALnnPQDgNXAu8C9rvvjgQqc8+wUG2OGAkOBW0Ski+v1g3COCnrjnP//PGPMszinch5jjBnjWu4RY0w60B+4UET6i0gYzilJJhljhgBxDVbhEZxTswzDeY2Mp0Ukojn/jJQCH5nmQqkzaCUiG1y3l+Cc7mQ2MARY45r6oxVwYl6oWuCfrts9gQPGmDUAxpjjACIyAegvIle5lovGOelcNbDaGLPPtdwGoDOw9BS5rhGR2Tj/D3bEWSIBQJZrIjuA+a6sABOAKSLyoOt+GNAJ2H6Wfx5KnZGWgvJ1Fa7RQD3XxZVeN8Y8fIrlK40xtY28pwB3G2MWnfS+FwENZ6Gt5RT/x1yjigeBocaYYyIyD+eXfGOfeaUxZmcjyynVJLr5SPmjb4CrRCQeQETayamvXb0T6CgiQ13LRbk2Qy0CbheRYNfjPdzYlFOCc0pvcF4hrAwoFpH2ODdrnfi8rvL/1w+f1uD1i4C7XYV2YvZbpZqdjhSU3zHGbBORXwNfifOCPDXAnUDuSctVi8g04O8i0grn/oTxwMs4Nwutc31JFwBXNPKxc4B/ich+Y8wYEVkP7MB51axlrs+rEJE7XMuV4Zwy+oQngL8Cm1yZs4FLz/kPQanT0FlSlfIgIhJpjCl1lc1zwG5jzF/szqX8h24+Usqz3OLaQb0V5w7sl2zOo/yMjhSUUkrV05GCUkqpeloKSiml6mkpKKWUqqeloJRSqp6WglJKqXpaCkopper9H9+K5jawONnrAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x1063692b0>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "def gini(p):\n",
    "    # 1 - (p^2 + (1-p)^2) = 1 - p^2 - (1 - p)^2 = -2p^2 + 2p\n",
    "    return -2 * p**2 + 2 * p\n",
    "\n",
    "x = np.linspace(0.01,0.99,200)\n",
    "plt.plot(x, gini(x))\n",
    "plt.xlabel('Percentage')\n",
    "plt.ylabel('Entropy')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "可以看到跟之前的信息熵的函数是非常相近的，p为0.5时函数达到最大值"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 使用决策树进行划分，以基尼系数作为标准"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn import datasets\n",
    "\n",
    "iris = datasets.load_iris()\n",
    "# 为了可视化，取后2个特征\n",
    "# 由于只展示效果，不评估模型的泛化能力，所以不做train test split\n",
    "X = iris.data[:,2:]\n",
    "y = iris.target"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=2,\n            max_features=None, max_leaf_nodes=None,\n            min_impurity_decrease=0.0, min_impurity_split=None,\n            min_samples_leaf=1, min_samples_split=2,\n            min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n            splitter='best')"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.tree.tree import DecisionTreeClassifier\n",
    "\n",
    "# 默认的criterion就是criterion='gini'，可以省略\n",
    "dt_clf = DecisionTreeClassifier(max_depth=2,criterion='gini')\n",
    "dt_clf.fit(X,y)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 绘制决策边界"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/seamonster/MachineLearningClassicAlgorithmEnv/lib/python3.6/site-packages/matplotlib/contour.py:967: UserWarning: The following kwargs were not used by contour: 'linewidth'\n  s)\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD8CAYAAACMwORRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4xLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvAOZPmwAAHBJJREFUeJzt3XuQXPWZ3vHvOxckGEYMWAIJJCMo2LUxWaRlImNry0WtYwKIMi7WJHIcx3ZRq9TGOJBsVSqgFLfYy26lyo7KOHYpQBZ2vbDmYke7iDXesmq5uLiMZLEGiZQVbCKB0ADSSKMbaGbe/NFnZnrOnJ4+ffp0n0s/n6opun/zO6dfidKr1jnPvG3ujoiIlEtX1gWIiEj61NxFREpIzV1EpITU3EVESkjNXUSkhNTcRURKqG5zN7P5Zvaimb1sZq+a2Z0Re+aZ2V+b2S4ze8HMlreiWBERiSfOO/f3gd9390uAFcCVZnZZaM8NwAF3vwD4NvBn6ZYpIiKNqNvcveJw8LQ3+Ar/5NO1wAPB40eBT5uZpValiIg0pCfOJjPrBrYCFwDfdfcXQlvOAXYDuPuYmR0EPgS8GzrPOmAdQN+8eZdeuHhxc9VLQ947+UNZlyAiTdr92rZ33X1RvX2xmru7jwMrzGwA+JGZXezurzRalLtvBDYCrFy+3LesX9/oKaQJD6z4ctYliEiTbl417404+xpKy7j7CLAFuDL0rTeBZQBm1gOcBrzXyLlFRCQ9cdIyi4J37JjZycBngNdC2zYBk28LPw/8zDWRTEQkM3EuyywBHgiuu3cBP3T3vzWzu4Ahd98E3Af8hZntAvYDa1tWsYiI1FW3ubv7PwIrI9Zvq3p8HLg+3dJERCQp/YSqiEgJqbmLiJSQmruISAmpuYuIlJCau4hICam5i4iUkJq7iEgJqbmLiJSQmruISAmpuYuIlJCau4hICam5i4iUkJq7iEgJqbmLiJSQmruISAmpuYuIlJCau4hICam5i4iUkJq7iEgJqbmLiJSQmruISAmpuYuIlJCau4hICam5i4iUkJq7iEgJqbmLiJSQmruISAnVbe5mtszMtpjZDjN71cxuithzuZkdNLPtwddtrSlXRETi6ImxZwz4Y3ffZmb9wFYz+6m77wjte8bdr0m/RBEpkl0jTzI0fA+Hx/Zxas9ZDJ55IxcMXJXoOCDRuSRGc3f3vcDe4PGome0EzgHCzV1EOtyukSd5Zu83GPfjABwee5tn9n4DYM6mHHXcP7x1B2bGhJ9o6FxS0dA1dzNbDqwEXoj49ifM7GUze9LMPpZCbSJSMEPD90w16Enjfpyh4XsaPs4Zm2rsjZxLKuJclgHAzE4FHgNudvdDoW9vA85198NmdjXwY+DCiHOsA9YBLD3jjMRFi0g+HR7b19B63O8n3dvJYr1zN7NeKo39B+7+ePj77n7I3Q8HjzcDvWa2MGLfRncfdPfBhf39TZYuInlzas9ZDa3H/X7SvZ0sTlrGgPuAne7+rRp7Fgf7MLNVwXnfS7NQEcm/wTNvpNvmz1jrtvlTN0cbOc7ooct6Gz6XVMS5LLMa+BLwSzPbHqzdCnwYwN2/D3we+CMzGwOOAWvd3VtQr4jk2OSNzkYTLrWOS3IuqYiTlnkWsDp77gF0l0MkZ5LGEuN67q27eW3kcZwJjC4+MnAdq8++JdFrXDBwVeRxaubJxL6hKiLFkjSWGNdzb93NzpFHp547E1PPV599S9Pnl+Zo/IBISSWNJcb12sisbMWc69Jeau4iJZU0lhiXM9HQurSXmrtISSWNJcZlNdpHrXVpL/1fECmppLHEuD4ycF1D69JeuqEqUlJJY4lxTd40jUrLSPbU3EU6TNzpi1Fr4b8YVp99y6xmHid+GTei2eooZ1J5rauamrtIScWdtPj03jtxd5yxOfclne4YPi5uRLPVUc6k8lpXmK65i5RU3EmLE35iqrHPtS/pdMfwcXEjmq2OciaV17rC1NxFSqoV0xOTTnesXo8b0Wx1lDOpvNYVpuYuUlKtmJ6YdLpj9XrciGaro5xJ5bWuMDV3kZKKO2mxy3qx0O23pBMZ48Qv40Y0Wx3lTCqvdYXphqpISTUyaTHuWtLpjtXHxY1otjrKmVRe6wqzrCbzrly+3LesX5/Ja3eqB1Z8OesSpGSKHmmMkvdab141b6u7D9bbp3fuIpJI0SONUYpUaz265i4iiRQ90hilSLXWo+YuIokUPdIYpUi11qPmLiKJFD3SGKVItdaj5i4iiRQ90hilSLXWoxuqIpJI0SONUYpUaz1q7iKSWK0PtU66Lw+KVOtc1NxFJFI4773s1N9j9+FnU/nhp7xkyfNSRyuouYvILFF5750jj059v5HxwUlH/rZaXupoFd1QFZFZovLeYXHHBycd+dtqeamjVdTcRWSWtHPdSUb+tlpe6mgVNXcRmSXtXHeSkb+tlpc6WkXNXURmicp7h8UdH5x05G+r5aWOVtENVRGZJSrvnVZaJi9Z8rzU0Sp1R/6a2TLgQeAswIGN7r4htMeADcDVwFHgK+6+ba7zauRv+2nkb7m0MqpYdM1EHPMej0xz5O8Y8Mfuvs3M+oGtZvZTd99Rtecq4MLg6+PA94L/ikgLxIkqRsUSo+KLZYr/QXMRxzLFI+tec3f3vZPvwt19FNgJnBPadi3woFc8DwyY2ZLUqxURIF5UMSqWGBVfLFP8D5qLOJYpHtnQDVUzWw6sBF4IfescYHfV8z3M/gsAM1tnZkNmNvTu6GhjlYrIlFZGFYuumYhjmeKRsZu7mZ0KPAbc7O6HkryYu29090F3H1zY35/kFCJCa6OKRddMxLFM8chYzd3Meqk09h+4++MRW94EllU9XxqsiUgLxIkqRsUSo+KLZYr/QXMRxzLFI+veUA2SMPcBO939WzW2bQJuNLOHqdxIPejue9MrU0SqtTKqWHTNRBzLFI+Mk5ZZDXwJ+KWZbQ/WbgU+DODu3wc2U4lB7qIShfxq+qWKSLXwaNpdI0+y+/CzM/bsO7qdI2PDgHNkbJh9R7ez+uxbcjulMSxpXc2M7e2Ykb/u/ixgdfY48LW0ihKRxkRF+P7hrdtxxqf2OBNTccnVZ98y57F5iP/lta6i0PgBkRKIivBVN/Zqr43MvG2W1/hfXusqCjV3kRJoJKrnTMQ6Nuv4X17rKgo1d5ESaCSqZ6E/9nmN/+W1rqJQcxcpgagIn9EdufcjA9fVPTYP8b+81lUUmgopUgK1Inz7jm7ntZHHcSYwuvjIwHUzbqbOdWzWNy3zWldRqLmLFMArv/pDXvxgKxNU/rm96qRLOdh3/qzGvfa3nph17GT2va/nTM46ZUXk+ePE/5576+5Zr3fWKStiNd84kcZae+I087xGObOk5i6Sc6/86g95/oOtYJVE8gRUnp+YnqodFXNMM0r43Ft3z5g6Ofl6O0ceozIJvPb549ShSY7p0zV3kZx7saqxTwk/D1THHNOMEobjk9Nmfh5E1Pnj1KFJjulTcxfJuYn6W6ZUxxzTjBKG45NzCZ8/Th2a5Jg+NXeRnGvkD2l1zDHNKGE4PjmX8Pnj1KFJjulTcxfJuVUnXQrhj8Os8fGY1THHNKOE4fjktJmXh6LOH6cOTXJMn26oiuTcxRf+T4iZlqmOOaYZJZw8b5K0TJw6NMkxfXU/ILtV9AHZ7acPyBYpvjQ/IFtK4qaP/WXWJXS8J97YyYZXnuPto6MsPqWfmy5ezZpzP5rOyXcMwdOb4dABWHA6fOpquKhuD5CCuTnmPjV3kTZ54o2d3LH17zk+XvnQ6r1HR7lj698DNN/gdwzB3/0QxoIPvz50oPIc1OA7lG6oirTJhleem2rsk46Pj7HhleeaP/nTm6cb+6SxE5V16Uhq7iJt8vbR0YbWG3LoQGPrUnpq7iJtsviU/obWG7Lg9MbWpfTU3EXa5KaLVzO/e+ZtrvndPdx08ermT/6pq6Gnd+ZaT29lXTqSbqiKtMnkTdOWpGUmb5oqLSMBNXeRNlpz5Ahrdr813YDPOzJ7U9JI40WDs/flJB7Z0gioRFJzF2mXOHHFNCONOYlHtjQCKjXpmrtIu8SJK6YZacxJPLKlEVCpSc1dpF3ixBXTjDTmJB7Z0gio1KTmLtIuceKKaUYacxKPbGkEVGpScxdplzhxxTQjjTmJR7Y0Aio16YaqSLvEiSumGWnMSTyypRFQqaluczez+4FrgGF3vzji+5cD/xv4dbD0uLvflWaRIrnSTLxwy9/AkUOVx4cOwFOPzT5XXE89Ai8/Dz4B1sUTH/0nbLCxGQ2Uvj42LDubt4/2V9b6+ljT+K+4aWvO/aiaeZvFeef+58A9wINz7HnG3a9JpSKRPGsmXvjd26cb+6QPjle+Js/1xEPQZTA+Pvf5n3oEtv986ukTp8znjiPvcbyrcqV179FR/stLP8HMODExMbWmCGLnqHvN3d2fBva3oRaR/GsmXhhu7FF8Yrqxz3X+l5+f8XTD6QNTjX3qMPepxj5JEcTOkdYN1U+Y2ctm9qSZfazWJjNbZ2ZDZjb07qhiUFJAWcULw+f3mU377Z7u2KdSBLEzpNHctwHnuvslwHeAH9fa6O4b3X3Q3QcX9isGJQWUVbwwfH6b+Ud38Vjo3f4cFEHsDE03d3c/5O6Hg8ebgV4zW9h0ZSJ51Ey8sG9B/T3WBd2hd+FR57/kshlPbzowwvzQJZgeM3pDl2oUQewcTTd3M1tsZhY8XhWc871mzyuSSxcNwpX/Yvqd9ILTK8/jpGW+dufsBn/S/JnnWvMFuGpt/fNfcT2s+OTUO/g1R49zR9+HWHJKPwYsOaWfb/zTf85/Hbxixtodl/4z3UztEHGikA8BlwMLzWwPcDvQC+Du3wc+D/yRmY0Bx4C17u4tq1gkLWlOX4wSiipyyWWVBh9VQ7U9v4bRg5XHowcrz2F2rVdcX/kKrAm+wlrZzDXtMb8sqz68cvly37J+fSav3akGvhj/plvphSONULn8EfddeD2hqOKUFZ+cbshRNVjXrJulketp1ppQeNojVC776F8HrWV9N2x197r/4zV+QDpTqycmhqKKketRNUQ19qj1HHz4taY95puau3SmVkca4zTpZl8r4w+/1rTHfFNzl87U6kij1fijVb3e7Gtl/OHXmvaYb2ru0plaPTExFFWMXI+qIc5fCpCLD7/WtMd801RI6Uytnpg4edM0nJapSrfUrGHPr2cft/S8zKc7hmnaY74pLdNBlJYRKb64aRm9c5dia2b8bpSH/wf8v19NP+9bAEcPz3wXDbPfWUetRb3bhplr538UXt+Zq3fkUg56595BSvfOPe2serixN8sMqv98dXfDhNdO0kAu8uuSb8q5S/mlnVVPs7HDzMYOlVG+czV2yEV+XcpBzV2KK6vxu61W9PolF9TcpbiyGr/bakWvX3JBzV2KK+2s+ocvbL6mapVhqdO6u2vn2CflIL8u5aDmLsXVzPjdKGv/3ewG37dguiFbV2XwV9Wo3TnX1vyrmbVdtbYy0rd6bcUn06tfpIqikFJsccfvRomKUf7OKhh5d+5o4o6h6fhi/2mVyONFgzN/QGlyX5J60453SkdSc5fOFI5RHjoATzwEXTb9AdWHDlT2wHRzjTouvKeRfXHqinOcSIguy0hnqjVudzz0WaThaGLc+GXSmGarRxFLx1Bzl87USNywem/c+GXSmGZZ453Sdmru0pkaiRtW740bv0wa0yxrvFPaTs1dOlOtcbvdoREN4Whi3Phl0phmq0cRS8fQDVXpTLXG7UatVd/IjDsqOOlI4VaPIpaOoeYu6cprjC+qrihxoopx45dJY5rNxDtFAmrukp68xvii6nry4ZkTGvNSq0hKdM1d0pPXGF9UXVETGvNQq0hK1NwlPXmN8SWNPYoUmJq7pCevMb6ksUeRAlNzl/TkNcYXVVfUhMY81CqSEt1QlfTkNcaXNPYoUmB1m7uZ3Q9cAwy7+8UR3zdgA3A1cBT4irtvS7tQKYikMb6nHpn9AdPhKYtx99WKY0bVFTXxUR9gLSUQ57LMnwNXzvH9q4ALg691wPeaL0s6ylOPwPafT6dXfKLy/KlHGt83GXucvDE6GXGMGr8bFnXs9p8nO5dIxuo2d3d/Gtg/x5ZrgQe94nlgwMyWpFWgdICXn4+3HmdfM3HMqGPDFJeUgkjjhuo5wO6q53uCtVnMbJ2ZDZnZ0Lujoym8tJRCOG9eaz3OvmbimHFjkIpLSgG0NS3j7hvdfdDdBxf297fzpSXPan2uaHg9zr5m4phxY5CKS0oBpNHc3wSWVT1fGqyJxHPJZfHW4+xrJo4ZdWyY4pJSEGk0903Av7GKy4CD7r43hfNKp7ji+ugPmA6nYOLsa+ZDs6OO1QdYS0GZu8+9wewh4HJgIbAPuB3oBXD37wdRyHuoJGqOAl9197pxgpXLl/uW9eubKl4aM/DF7vqbRCTXrO+Gre5e9x1G3Zy7u3+hzvcd+FoDtYmISItp/ICISAmpuYuIlJCau4hICam5i4iUkJq7iEgJqbmLiJSQmruISAmpuYuIlJCau4hICam5i4iUkJq7iEgJqbmLiJSQmruISAmpuYuIlJCau4hICam5i4iUkJq7iEgJqbmLiJSQmruISAmpuYuIlFDdD8iW+jaPnsx39vfz9lg3i3vG+foZo1zdfyzrskSkg6m5N2nz6Mnc9c5pHPfKP4L2jvVw1zunAajBi0hmdFmmSd/Z3z/V2Ccd9y6+s78/o4pERNTcm/b2WHdD6yIi7aDm3qTFPeMNrYuItIOae5O+fsYo821ixtp8m+DrZ4xmVJGIiG6oNm3ypqnSMiKSJ7Gau5ldCWwAuoF73f1PQ9//CvDfgDeDpXvc/d4U68y1q/uPqZmLSK7Ube5m1g18F/gMsAd4ycw2ufuO0Na/dvcbW1BjISn7LiJZinPNfRWwy91fd/cPgIeBa1tbVrFNZt/3jvXg2FT2ffPoyVmXJiIdIk5zPwfYXfV8T7AW9gdm9o9m9qiZLUuluoJS9l1EspZWWuZvgOXu/jvAT4EHojaZ2TozGzKzoXdHy5smUfZdRLIWp7m/CVS/E1/K9I1TANz9PXd/P3h6L3Bp1IncfaO7D7r74ML+8r6LVfZdRLIWp7m/BFxoZueZ2UnAWmBT9QYzW1L19LPAzvRKLB5l30Uka3XTMu4+ZmY3Aj+hEoW8391fNbO7gCF33wT8ezP7LDAG7Ae+0sKac0/ZdxHJWqycu7tvBjaH1m6renwLcEu6pbVX3Ojiuj1n8OL786aer5r3Pp877disY9N8TRGRRuknVIk/tne6sdvU2ovvz+Ol4Xl4sLZ3rIfbhwdwnDFqn0+jgkWklTRbhvjRxXBjr7Cpxj7pBDbV2GudT3FJEWklNXfaF12sPp/ikiLSSmrutC+6WH0+xSVFpJXU3IkfXVw1733AQ0c7Flrrxelh7vMpLikiraTmTuUG5m2LDrKkZwzDWdIzxm2LDs66sblx6f6qBl/5WjXvfb555siMY+88c4S7zpz7fHFfU0QkCaVlAr841su+sW4c2DfWzS+O9fLjgydHxh537++Zii9+7rRjkSN/4wwJ06hgEWkVNXfgm8MLeGS0j8kkzAQEz6Fe7DEqvqiYo4hkTZdlgMeqGvs0i1wLxx6j4ouKOYpI1tTcIXTrs3Hh+KJijiKSNTV3mv9NCMcXFXMUkaypuQN/0H+EqIhjnNhjVHxRMUcRyZqaO7D+zENc33+ErqChd+Fc338kVuwxKr6omKOIZK10aZm4kxa/ObyAx0b7mKDyN1wfE1PX3ieArUdP4o3xmb89W98/iZ3DvYxODQTr5u7hBXz7nX7e8enr6YtsnP+wqP67dE2FFJFWKVVzjxtBjIo+Vhr2dBLm9fHe4NH02jgwGkrRjNLFqM/c9453c+vwwNSapkKKSLuV6rJM3Ahi7ehj+Hl6a5oKKSLtVKrmHjeC2Gz0MSlNhRSRdilVc48bQczqF62pkCLSLqVq7nEjiLWjj+Hn6a1pKqSItFOpmnvcCGJU9LGfCapjj+d3n6C76jk43RH7+plgkY3PWFtk4/xJncik4pIi0krmHn7X2R4rly/3LevXZ/LanWrgi7qeL1J01nfDVncfrLevVFHIWpLmyaOOu/e9vqqYJJzffYLHl7/byvJFRBpWqssyUSbz5HvHenBsKk9eb9561HG3Dg8Ejd2mvl4f7+W63yxsxy9FRCS20jf3pHnyqONqZdqr38mLiORB6Zt70jy58uYiUmSlb+5J8+TKm4tIkZW+uSfNk0cdVyvTfn73iabrFBFJU+mbe9I8edRxf3LmSNDIZ+bhlZYRkbyJFYU0syuBDUA3cK+7/2no+/OAB4FLgfeAf+nuv0m31OSu7j+W6IeDoo7TDxmJSBHUfeduZt3Ad4GrgIuAL5jZRaFtNwAH3P0C4NvAn6VdqIiIxBfnsswqYJe7v+7uHwAPA9eG9lwLPBA8fhT4tJmFM4MiItImcS7LnAPsrnq+B/h4rT3uPmZmB4EPATMuRpvZOmBd8PTw6evW/Z8kRce0MPz6BZN+/evqb0mRfv+zU+TaQfXXc26cTW0dP+DuG4GN7XgtMxuKM38hr1R/topcf5FrB9WfljiXZd4EllU9XxqsRe4xsx7gNCo3VkVEJANxmvtLwIVmdp6ZnQSsBTaF9mwCvhw8/jzwM89q3KSIiNS/LBNcQ78R+AmVKOT97v6qmd0FDLn7JuA+4C/MbBewn8pfAFlry+WfFlL92Spy/UWuHVR/KjKb5y4iIq1T+p9QFRHpRGruIiIlVLrmbmb3m9mwmb2SdS1JmNkyM9tiZjvM7FUzuynrmuIys/lm9qKZvRzUfmfWNSVhZt1m9gsz+9usa2mUmf3GzH5pZtvNbCjrehplZgNm9qiZvWZmO83sE1nXFJeZ/Xbw+z75dcjMbs6snrJdczezTwGHgQfd/eKs62mUmS0Blrj7NjPrB7YCn3P3HRmXVlfwU8l97n7YzHqBZ4Gb3P35jEtriJn9R2AQWODu12RdTyPM7DfAoLsX8oeAzOwB4Bl3vzdI553i7iNZ19WoYGzLm8DH3f2NLGoo3Tt3d3+aSmKnkNx9r7tvCx6PAjup/ARw7nnF4eBpb/BVqHcPZrYUWAPcm3UtncbMTgM+RSV9h7t/UMTGHvg08H+zauxQwuZeJma2HFgJvJBtJfEFlzS2A8PAT929MLUH/jvwn4DwMP+icOApM9sajPsokvOAd4D/FVwWu9fM+rIuKqG1wENZFqDmnlNmdirwGHCzux/Kup643H3c3VdQ+UnmVWZWmEtjZnYNMOzuW7OupQm/5+6/S2WK69eCy5RF0QP8LvA9d18JHAH+c7YlNS64nPRZ4JEs61Bzz6HgevVjwA/c/fGs60ki+Of0FuDKrGtpwGrgs8F164eB3zezv8y2pMa4+5vBf4eBH1GZ6loUe4A9Vf/ae5RKsy+aq4Bt7r4vyyLU3HMmuCl5H7DT3b+VdT2NMLNFZjYQPD4Z+AzwWrZVxefut7j7UndfTuWf1T9z93+dcVmxmVlfcBOe4HLGFUBhUmPu/jaw28x+O1j6NJD7IEGEL5DxJRlo81TIdjCzh4DLgYVmtge43d3vy7aqhqwGvgT8Mrh2DXCru2/OsKa4lgAPBEmBLuCH7l64OGGBnQX8KPgohR7gr9z977ItqWFfB34QXNp4HfhqxvU0JPhL9TPAv828lrJFIUVERJdlRERKSc1dRKSE1NxFREpIzV1EpITU3EVESkjNXUSkhNTcRURK6P8DQvphtANyYsAAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x1094b96d8>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from playML.plot_utils import plot_decision_boundary\n",
    "\n",
    "plot_decision_boundary(dt_clf, axis=[0.5, 7.5, 0, 3])\n",
    "plt.scatter(X[y==0,0],X[y==0,1])\n",
    "plt.scatter(X[y==1,0],X[y==1,1])\n",
    "plt.scatter(X[y==2,0],X[y==2,1])\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 手写模拟使用基尼系数进行划分"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def split(X, y, d, value):\n",
    "    \"\"\"\n",
    "    这个函数不是帮我们寻找d和value，而是在已知d和value的情况下，帮我们划分数据集\n",
    "    :param X: \n",
    "    :param y: \n",
    "    :param d:   划分的维度 \n",
    "    :param value:  划分的维度的阈值\n",
    "    :return: \n",
    "    \"\"\"\n",
    "    index_a = X[:,d]<=value\n",
    "    index_b = X[:,d]>value\n",
    "    \n",
    "    return X[index_a],X[index_b],y[index_a],y[index_b]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 计算基尼系数\n",
    "from collections import Counter\n",
    "\n",
    "# y里边有每个样本的分类信息，所以足以让我们计算基尼系数\n",
    "def gini(y):\n",
    "    # Counter返回一个字典，key为y的类别，value为该类别出现次数\n",
    "    counter = Counter(y)\n",
    "    # 基尼系数的右半边\n",
    "    res = 0.0\n",
    "    for num in counter.values():\n",
    "        # p就是该类别出现概率：出现次数/总数\n",
    "        p = num/len(y)\n",
    "        res += p**2\n",
    "    return 1 - res"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 寻找划分的维度和阈值\n",
    "def try_split(X, y):\n",
    "    # 我们的目的是寻找一个信息熵最小的划分，那么我们先设置初始信息熵为正无穷\n",
    "    best_gini = float('inf')\n",
    "    best_d, best_value = -1, -1\n",
    "\n",
    "    for d in range(X.shape[1]):\n",
    "        # 我们对每一维度上所有的值进行排序，每个可能选择的阈值就是两个相邻的值的均值\n",
    "        sorted_index = np.argsort(X[:, d])\n",
    "        for i in range(1, X.shape[0]):\n",
    "            # 注意前后两个数有可能刚好相等，那就会无法分开，所以应该跳过\n",
    "            if (X[sorted_index[i - 1], d] != X[sorted_index[i], d]):\n",
    "                v = (X[sorted_index[i - 1], d] + X[sorted_index[i], d]) / 2.\n",
    "                X_l, X_r, y_l, y_r = split(X, y, d, v)\n",
    "                # 基尼系数\n",
    "                g = gini(y_l) + gini(y_r)\n",
    "                \n",
    "                if g < best_gini:\n",
    "                    best_gini = g\n",
    "                    best_d = d\n",
    "                    best_value = v\n",
    "    return best_gini, best_d, best_value"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 我们来看看，第一次划分，在什么维度上，取什么值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "best_gini =  0.5\nbest_d =  0\nbest_value =  2.45\n"
     ]
    }
   ],
   "source": [
    "best_gini, best_d, best_value = try_split(X, y)\n",
    "print('best_gini = ', best_gini)\n",
    "print('best_d = ', best_d)\n",
    "print('best_value = ', best_value)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "X1_l,X1_r,y1_l,y1_r = split(X,y,best_d,best_value)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.0"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gini(y1_l)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "可以看到y1_l的分类都是0，所以没有不确定性，基尼系数是0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Counter({0: 50})"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Counter(y1_l)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.5"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 基尼系数不为0，可以继续划分\n",
    "gini(y1_r)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "best_gini2 =  0.21057149006459386\nbest_d2 =  1\nbest_value2 =  1.75\n"
     ]
    }
   ],
   "source": [
    "best_gini2, best_d2, best_value2 = try_split(X1_r, y1_r)\n",
    "print('best_gini2 = ', best_gini2)\n",
    "print('best_d2 = ', best_d2)\n",
    "print('best_value2 = ', best_value2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "X2_l,X2_r,y2_l,y2_r = split(X1_r,y1_r,best_d2,best_value2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(0.16803840877914955, 0.04253308128544431)"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gini(y2_l), gini(y2_r)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "可以看到基尼系数都没有降为0，还可以继续划分  \n",
    "不过我们现在划分了两次，对应的就是那个depth/max_depth"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
